import pybedtools

fantom5_enhancers = pybedtools.BedTool("/osc-fs_home/mdehoon/Data/Fantom5/Enhancers/F5.hg38.enhancers.bed.gz")
roadmap_enhancers = pybedtools.BedTool("/osc-fs_home/mdehoon/Data/RoadmapEpigenomics/enhancers.bed")
cage_enhancers = pybedtools.BedTool("enhancer_predictions/enhancers.CAGE.bed")
hiseq_enhancers = pybedtools.BedTool("enhancer_predictions/enhancers.HiSeq.bed")

hiseq_number = len(hiseq_enhancers)
overlap = len(hiseq_enhancers.intersect(roadmap_enhancers, u=True))
print("Number of HiSeq enhancers overlapping Roadmap Epigenomics enhancer regions: %d out of %d (%.2f%%)" % (overlap, hiseq_number, 100*overlap/hiseq_number))

overlap = len(hiseq_enhancers.intersect(fantom5_enhancers, u=True))
print("Number of HiSeq enhancers overlapping FANTOM5 enhancers: %d out of %d (%.2f%%)" % (overlap, hiseq_number, 100*overlap/hiseq_number))


filename = "enhancers.bed"
print("Writing", filename)
stream = open(filename, "w")
fantom5_counter = 0
for enhancer in fantom5_enhancers:
    fields = enhancer.fields
    center = int(fields[6])  # thickStart
    fields[1] = str(center - 200)
    fields[2] = str(center + 201)
    fields[3] = "FANTOM5|" + enhancer.name
    fields = fields[:4]
    enhancer = pybedtools.create_interval_from_list(fields)
    stream.write(str(enhancer))
    fantom5_counter += 1
print(fantom5_counter, "FANTOM5 enhancers")

hiseq_only = hiseq_enhancers.intersect(cage_enhancers, v=True).intersect(fantom5_enhancers, v=True)
hiseq_counter = 0
for enhancer in hiseq_only:
    fields = enhancer.fields
    center = int(fields[6])  # thickStart
    fields[1] = str(center - 200)
    fields[2] = str(center + 201)
    fields[3] = "HiSeq|" + enhancer.name
    fields = fields[:4]
    enhancer = pybedtools.create_interval_from_list(fields)
    stream.write(str(enhancer))
    hiseq_counter += 1
print(hiseq_counter, "enhancers discovered in HiSeq only")

cage_only = cage_enhancers.intersect(hiseq_enhancers, v=True).intersect(fantom5_enhancers, v=True)
cage_counter = 0
for enhancer in cage_only:
    fields = enhancer.fields
    center = int(fields[6])  # thickStart
    fields[1] = str(center - 200)
    fields[2] = str(center + 201)
    fields[3] = "CAGE|" + enhancer.name
    fields = fields[:4]
    enhancer = pybedtools.create_interval_from_list(fields)
    stream.write(str(enhancer))
    cage_counter += 1
print(cage_counter, "enhancers discovered in CAGE only")


both = hiseq_enhancers.intersect(cage_enhancers).cat(cage_enhancers.intersect(hiseq_enhancers),c=(7,),o=("last",)).intersect(fantom5_enhancers, v=True)
both_counter = 0
for enhancer in both:
    fields = enhancer.fields
    assert len(fields) == 4
    center = int(fields[3])  # thickStart
    fields[1] = str(center - 200)
    fields[2] = str(center + 201)
    name = "Both|%s:%d-%s" % (enhancer.chrom, enhancer.start, enhancer.end)
    fields[3] = name
    enhancer = pybedtools.create_interval_from_list(fields)
    stream.write(str(enhancer))
    both_counter += 1
print(both_counter, "enhancers discovered in HiSeq and CAGE")

stream.close()

print("Total number of enhancers:", fantom5_counter+hiseq_counter+cage_counter+both_counter)
